Here's an attempt at understanding Brian Long's code that quickly digressed into me doing different things (sorry!). This is a work-in-progress; I'm looking for feedback + suggestions on my approach.
%load_ext autoreload
%autoreload 2
cd blong/raw
# stdlib
from glob import glob
import os, json
from collections import Counter
from copy import deepcopy
from functools import partial
import warnings
# images
import pandas as pd
import numpy as np
import skimage.io
import scipy.io
from scipy import ndimage
from trackpy import bandpass
import cv2
from skimage import img_as_ubyte
from skimage.morphology import white_tophat, disk
# plotting
import matplotlib.pyplot as plt
from showit import image, tile
from starfish.viz import tile_lims
plt.style.use('default')
# starfish
from starfish.io import Stack
Map channels to integer indices. We exclude the DAPI channel, which goes into aux.
fluor_to_file_mapping = {
"488": 0,
"561": 1,
"640": 2
}
org_json = {}
experiment.json has 3 sections: data, aux, and metadata.
data = []
for f in glob('*.tif'):
_, _, suffix = f.partition('Bandpass')
file_ = os.path.basename(f)
ch, z = suffix.strip('.tif').split('_')
if ch == '405':
continue # DAPI
data.append({
"hyb": 0,
"ch": fluor_to_file_mapping[ch],
"file": file_,
"z": int(z)
})
# define the aux section
# fake this by just including one of them
aux = [
{
"type": "dapi",
"file": 'img_000000000_Bandpass405_000.tif',
"format": "TIFF"
}
]
# count the z-sections in the data that was provided
z = max(Counter(v['ch'] for v in data).values())
metadata = {
"num_hybs": 1,
"num_chs": 3,
"shape": [2048, 2048, z],
"is_volume": True,
"format": "TIFF"
}
Write the full org.json to disk
with open('org.json', 'w') as f:
json.dump(dict(data=data, aux=aux, metadata=metadata), f)
Visualize the metadata file
!cat org.json | jq .metadata
Visualize the data organization
!cat org.json | jq . | head -n 14
!echo "..."
Codebooks should map genes to channels and take on the following (example) structure:
[
{
"codeword": [
{ "h": 0, "c": 0, "v": 1 },
{ "h": 0, "c": 1, "v": 1 },
{ "h": 1, "c": 1, "v": 1 }
],
"gene": "SCUBE2"
},
{
"codeword": [
{ "h": 0, "c": 0, "v": 1 },
{ "h": 1, "c": 1, "v": 1 }
],
"gene": "BRCA"
},...
]
for the provided data, we have only one FOV. Based on this, create a codebook, dump it to disk, and visualize the output:
codebook = []
gene_to_channel = (("Nmnt", "488"), ("Ptprt", "561"), ("Nxph2", "640"))
for gene, channel in gene_to_channel:
codebook.append({
"codeword": [{
"h": 0,
"c": fluor_to_file_mapping[channel],
"v": 1
}],
"gene": gene
})
with open('codebook.json', 'w') as f:
json.dump(codebook, f)
!cat codebook.json | jq
Here I load the processed results provided by brian
def load_blong_results(npy_file):
"""load smFISH results"""
res = np.load(npy_file)
df = pd.DataFrame(
res,
columns=['x', 'y', 'z', 'brightness', 'radius^2', 'redundant_radius', 'peak', 'x^2', 'y^2', 'z^2', 'category']
).drop('redundant_radius', axis=1)
df.index.name = 'spot_id'
return df
res_488 = load_blong_results(os.path.expanduser('~/google_drive/scratch/starfish/brian_long/10-Pos_002_002_488_0.npy'))
res_640 = load_blong_results(os.path.expanduser('~/google_drive/scratch/starfish/brian_long/10-Pos_002_002_640_0.npy'))
res_561 = load_blong_results(os.path.expanduser('~/google_drive/scratch/starfish/brian_long/10-Pos_002_002_561_0.npy'))
s = Stack()
s.read('org.json')
# our data is in uint, so cast it there. This should not cause any loss of precision.
s.image._data = s.image._data.astype(np.uint16)
# look at a the channels in a few of the z-sections (every 5th)
subset = s.image.numpy_array[0, :, :, :, np.arange(0, z, 5)]
# trim outliers in each channel separately
clim = {}
for ch in np.arange(subset.shape[1]):
clim[ch] = np.percentile(np.ravel(subset[:, ch, :, :]), [1, 99])
# for vis purposes to label the axes
ch_to_fluor_mapping = {v: k for (k, v) in fluor_to_file_mapping.items()}
"""
Visualize channels x z-stack subset
note:
-----
it would be great to be able to explicitly label which dimensions of the image we want to
unfurl on the y and x axes, and to be able to sample images from them. Here I wanted to
view channel and z-section in a grid.
"""
width = 6
aspect_ratio = 7 / 3
height = width * aspect_ratio
f, axes = plt.subplots(7, 3, figsize=(width, height))
z_sections = np.arange(subset.shape[0])
channels = np.arange(subset.shape[1])
for z in z_sections:
for ch in channels:
vmin, vmax = clim[ch]
ax = axes[z, ch]
ax.imshow(subset[z, ch, :, :], vmin=vmin, vmax=vmax, cmap=plt.cm.gray)
ax.set_xticks([])
ax.set_yticks([])
if z == max(z_sections):
ax.set_xlabel('channel: %d\nfluor: %s' % (ch, ch_to_fluor_mapping[ch]), fontsize=16)
if ch == min(channels):
ax.set_ylabel('z: %d' % (z * 5), fontsize=16)
Write a bit of code to visualize dynamic ranges of the same images -- they're very different!
width = 6
aspect_ratio = 7 / 3
height = width * aspect_ratio
f, axes = plt.subplots(7, 3, figsize=(width, height))
z_sections = np.arange(subset.shape[0])
channels = np.arange(subset.shape[1])
for z in z_sections:
for ch in channels:
ax = axes[z, ch]
y, x = cumulative_distribution(subset[z, ch, :, :])
ax.plot(x, y)
ax.set_xticks([])
ax.set_yticks([])
if z == max(z_sections):
ax.set_xlabel('channel: %d\nfluor: %s' % (ch, ch_to_fluor_mapping[ch]), fontsize=16)
if ch == min(channels):
ax.set_ylabel('z: %d' % (z * 5), fontsize=16)
I think this suggests we need to do some kind of normalization of the channels and the z-sections. I'm not doing that right now -- did I miss something in Brian's pipeline code?
For the next section, I look at an individual z-section that seems like it has "middle-of-the-road" s:n.
def clip_vis2d(img, vmin=1, vmax=99, ax=None, size=7):
"""wrapper for showit image with a percentile-based clip"""
clim = np.percentile(img, [vmin, vmax])
return image(img, clim=clim, ax=ax, size=size)
def plot_spots(img, result_df, z, vmin=1, vmax=99, ax=None, size=2):
"""function to plot the results from brian's pipeline on top of any image,
called spots are colored by category
Parameters:
-----------
img: np.ndarray
2-d image
result_df: pd.Dataframe
result dataframe containing spot calls that correspond to the image channel
z: int
z-plane to plot spot calls for
vmin, vmax: int
clipping thresholds for the image plot
ax, matplotlib.Axes.Axis
"""
if ax is None:
_, ax = plt.subplots(figsize=(15, 15))
clip_vis2d(img, vmin=vmin, vmax=vmax, ax=ax)
# round the results df z values to the nearest integer section id
# we'll plot these indices
inds = np.round(result_df['z']) == z
# get the data needed to plot
selected = result_df.loc[inds, ['radius^2', 'x', 'y', 'category']]
grouped_by_peak_type = selected.groupby('category')
colors = ['r', 'c', 'y']
for color, (_, group) in zip(colors, grouped_by_peak_type):
for i in np.arange(group.shape[0]):
r, x, y, _ = group.iloc[i, :] # radius is a duplicate, and is present twice
c = plt.Circle((y, x), r * 10, color=color, linewidth=size, fill=False)
ax.add_patch(c)
return ax
This is equivalent to the first background clipping that Brian does, but adds a removal of the top percentile
# get an image
img = s.image.numpy_array[0, 1, :, :, 15]
clip_vis2d(img, 10, 99, size=15)
As far as I can tell, only spots are being called on the left side of the image. Some kind of issue with high-pass filtering, perhaps?
plot_spots(img, res_561, 15, 10, 99)
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 40))
clip_vis2d(img, 10, 99, ax=ax1);
plot_spots(img, res_561, z, 10, 99, ax=ax2);
Here are both images side by side
He used trackpy to find spots (https://soft-matter.github.io/trackpy/v0.3.2/)
We think this is similar: http://scikit-image.org/docs/dev/auto_examples/segmentation/plot_peak_local_max.html
def floor_image_percentile(img, percentile=10):
"""pre-clip the image"""
target = np.percentile(img, 10)
return img.clip(target) - target
def blong_bandpass(img):
"""
carries out a bandpass filter with parameters set as defined in particleFinder and particleFinder2.py
Parameters
----------
img, np.ndarray[np.uint16]
Notes
-----
- uses trackpy bandpass filter as implemented
- 1.5 lshort is too much (comment in code)
- similar to white top hat filtering?
"""
threshold = 1
bandpass_result = bandpass(
image=img,
lshort=0.5, # from particleFinder2.py
llong=5, # strelRadius from particleFinder.py
threshold=1, # postBandpassThreshold
truncate=4, # truncate is left as default (4)
)
return bandpass_result
def post_clip(img, threshold=1):
"""post-clip the image after band-passing"""
img = img.clip(min=threshold)
img -= threshold
return img
First, run the pre-clipping method. This seems to do a reasonable job of removing background.
floored_ = floor_image_percentile(img)
f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 6))
image(floored_, ax=ax1)
clip_vis2d(floored_, vmin=0, vmax=99, ax=ax2)
clip_vis2d(img, vmin=0, vmax=99, ax=ax3)
for ax in (ax1, ax2, ax3):
ax.set_xticks([]), ax.set_yticks([])
ax1.set_title('blong floor', fontsize=16);
ax2.set_title('blong floor; high-value removed', fontsize=16);
ax3.set_title('original; high-value removed', fontsize=16);
Next, run the bandpass filter.
bp_ = blong_bandpass(floored_)
f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 6))
image(bp_, ax=ax1)
clip_vis2d(bp_, vmin=0, vmax=99, ax=ax2);
clip_vis2d(floored_, vmin=0, vmax=99, ax=ax3)
for ax in (ax1, ax2, ax3):
ax.set_xticks([]), ax.set_yticks([])
ax1.set_title('blong bandpass', fontsize=16);
ax2.set_title('blong bandpass\nhigh-value removed', fontsize=16);
ax3.set_title('original\nhigh-value removed', fontsize=16);
In my hands, bandpass turns the images into noise.
We can make the bandpass look a bit better with different parameters, but overall I'm confused about:
I have no guarantee that the below approach works well across the entire z-stack, but I've implemented an approach that appears to identify a super-set of the spots in this image.
A digression: this is a gaussian convolution + a stricter background.
filtered_ = ndimage.gaussian_filter(img, 1.5)
min, max = np.percentile(filtered_, [40, 99])
clipped_ = filtered_.clip(int(min), int(max))
image(clipped_)
The mini-pipeline:
with warnings.catch_warnings():
warnings.simplefilter('ignore', UserWarning)
_, otsu_ = cv2.threshold(img_as_ubyte(filtered_), 0, 255, cv2.THRESH_OTSU)
image(otsu_)
Try out some different morphological element sizes
f, axes = plt.subplots(1, 3, figsize=(18, 6))
element_sizes = (3, 5, 9)
results = [white_tophat(otsu_, disk(n)) for n in element_sizes]
for ax, result, elem in zip(axes, results, element_sizes):
image(result, ax=ax)
ax.set_title('element size: %d' % elem)
Plot the called peaks on the size=3 disk element results
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
plot_spots(results[0], res_561, 15, vmin=0, vmax=100, ax=ax1, size=1)
plot_spots(img, res_561, 15, vmin=10, vmax=99, ax=ax2, size=1)
for ax in (ax1, ax2):
ax.set_xticks([]), ax.set_yticks([])
ax1.set_title('filtered + result peak calls', fontsize=14);
ax2.set_title('original image + result peak calls', fontsize=14);
def blong_mini_pipeline(stack, z, channel, results_df, plot_original=True):
ch = fluor_to_file_mapping[channel]
img = stack.image.numpy_array[0, ch, :, :, z]
filtered_ = ndimage.gaussian_filter(img, 1.5)
with warnings.catch_warnings():
warnings.simplefilter('ignore', UserWarning)
_, otsu_ = cv2.threshold(img_as_ubyte(filtered_), 0, 255, cv2.THRESH_OTSU)
wth_ = white_tophat(otsu_, disk(3))
if plot_original:
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
plot_spots(img, results_df, z, vmin=10, vmax=99, ax=ax2, size=1)
else:
f, ax1 = plt.subplots(figsize=(10,10))
plot_spots(wth_, results_df, z, vmin=0, vmax=100, ax=ax1, size=1)
# where are the spots in each section?
Counter(np.round(res_561['z']))
This one has the most spots, we agree quite well.
blong_mini_pipeline(s, 17, "561", res_561)
This one is far away, and there is more disagreement.
blong_mini_pipeline(s, 29, "561", res_561)
Try another channel that has good s:n
Counter(round(res_488['z']))
I think the "generalization" tests show that at the very least the way I'm doing this requires better parameter tuning. See e.g. below:
blong_mini_pipeline(s, 18, "488", res_488)
def two_to_three_d(tensor3d, func2d):
"""tensors are x, y, z still"""
result = np.zeros_like(tensor3d)
z = tensor3d.shape[-1]
for i in np.arange(z):
result[:, :, i] = func2d(tensor3d[:, :, i])
return result
def stack_apply(stack: Stack, func3d, verbose=True):
"""apply func3d over the 3d tensor defined by each hyb and channel, returning a new stack
func3d should be a partial function that takes only one parameter: the 3d tensor
"""
# make the new stack
new_stack = deepcopy(stack)
n_hybs, n_channels, *_ = stack.image.shape
# apply over each 3d tensor
for hyb in np.arange(n_hybs):
for channel in np.arange(n_channels):
hyb_view = stack.image.numpy_array[hyb, channel, :]
new_stack.image._data[hyb, channel, :] = func3d(hyb_view)
if verbose:
print("finished processing hyb %d, channel %d" % (hyb, channel))
return new_stack
def func3d(stack, func2d):
return stack_apply(
stack=stack,
func3d=partial(two_to_three_d, func2d=func2d)
)